{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# hospital\n",
    "\n",
    "The hospital table is a very simple table that summarizes high level hospital information. Unlike other tables, it does not contain any patient identifiers, and instead can only be joined to the patient table using `hospitalid`.\n",
    "\n",
    "**Note: many hospitals described in the hospital table have 0 patient admissions in the patient table**."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/alistairewj/.local/lib/python3.5/site-packages/psycopg2/__init__.py:144: UserWarning: The psycopg2 wheel package will be renamed from release 2.8; in order to keep installing from binary please use \"pip install psycopg2-binary\" instead. For details see: <http://initd.org/psycopg/docs/install.html#binary-install-from-pypi>.\n",
      "  \"\"\")\n"
     ]
    }
   ],
   "source": [
    "# Import libraries\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import psycopg2\n",
    "import getpass\n",
    "import pdvega\n",
    "\n",
    "# for configuring connection \n",
    "from configobj import ConfigObj\n",
    "import os\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Database: eicu\n",
      "Username: alistairewj\n"
     ]
    }
   ],
   "source": [
    "# Create a database connection using settings from config file\n",
    "config='../db/config.ini'\n",
    "\n",
    "# connection info\n",
    "conn_info = dict()\n",
    "if os.path.isfile(config):\n",
    "    config = ConfigObj(config)\n",
    "    conn_info[\"sqluser\"] = config['username']\n",
    "    conn_info[\"sqlpass\"] = config['password']\n",
    "    conn_info[\"sqlhost\"] = config['host']\n",
    "    conn_info[\"sqlport\"] = config['port']\n",
    "    conn_info[\"dbname\"] = config['dbname']\n",
    "    conn_info[\"schema_name\"] = config['schema_name']\n",
    "else:\n",
    "    conn_info[\"sqluser\"] = 'postgres'\n",
    "    conn_info[\"sqlpass\"] = ''\n",
    "    conn_info[\"sqlhost\"] = 'localhost'\n",
    "    conn_info[\"sqlport\"] = 5432\n",
    "    conn_info[\"dbname\"] = 'eicu'\n",
    "    conn_info[\"schema_name\"] = 'public,eicu_crd'\n",
    "    \n",
    "# Connect to the eICU database\n",
    "print('Database: {}'.format(conn_info['dbname']))\n",
    "print('Username: {}'.format(conn_info[\"sqluser\"]))\n",
    "if conn_info[\"sqlpass\"] == '':\n",
    "    # try connecting without password, i.e. peer or OS authentication\n",
    "    try:\n",
    "        if (conn_info[\"sqlhost\"] == 'localhost') & (conn_info[\"sqlport\"]=='5432'):\n",
    "            con = psycopg2.connect(dbname=conn_info[\"dbname\"],\n",
    "                                   user=conn_info[\"sqluser\"])            \n",
    "        else:\n",
    "            con = psycopg2.connect(dbname=conn_info[\"dbname\"],\n",
    "                                   host=conn_info[\"sqlhost\"],\n",
    "                                   port=conn_info[\"sqlport\"],\n",
    "                                   user=conn_info[\"sqluser\"])\n",
    "    except:\n",
    "        conn_info[\"sqlpass\"] = getpass.getpass('Password: ')\n",
    "\n",
    "        con = psycopg2.connect(dbname=conn_info[\"dbname\"],\n",
    "                               host=conn_info[\"sqlhost\"],\n",
    "                               port=conn_info[\"sqlport\"],\n",
    "                               user=conn_info[\"sqluser\"],\n",
    "                               password=conn_info[\"sqlpass\"])\n",
    "query_schema = 'set search_path to ' + conn_info['schema_name'] + ';'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Compare data completion for hospitals with and without admissions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>hospitalid</th>\n",
       "      <th>numbedscategory</th>\n",
       "      <th>teachingstatus</th>\n",
       "      <th>region</th>\n",
       "      <th>n</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>195</td>\n",
       "      <td>250-500</td>\n",
       "      <td>True</td>\n",
       "      <td>South</td>\n",
       "      <td>1174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>251</td>\n",
       "      <td>&lt;100</td>\n",
       "      <td>False</td>\n",
       "      <td>Midwest</td>\n",
       "      <td>318</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>402</td>\n",
       "      <td>&lt;100</td>\n",
       "      <td>False</td>\n",
       "      <td>West</td>\n",
       "      <td>405</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>120</td>\n",
       "      <td>&lt;100</td>\n",
       "      <td>False</td>\n",
       "      <td>South</td>\n",
       "      <td>111</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>171</td>\n",
       "      <td>250-500</td>\n",
       "      <td>False</td>\n",
       "      <td>West</td>\n",
       "      <td>2807</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   hospitalid numbedscategory teachingstatus   region     n\n",
       "0         195         250-500           True    South  1174\n",
       "1         251            <100          False  Midwest   318\n",
       "2         402            <100          False     West   405\n",
       "3         120            <100          False    South   111\n",
       "4         171         250-500          False     West  2807"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "query = query_schema + \"\"\"\n",
    "with tt as\n",
    "(\n",
    "select hospitalid, count(*) as n\n",
    "from patient\n",
    "group by hospitalid\n",
    ")\n",
    "select h.*, coalesce(tt.n, 0) as n\n",
    "from hospital h\n",
    "left join tt\n",
    "on h.hospitalid = tt.hospitalid\n",
    "\"\"\"\n",
    "\n",
    "df = pd.read_sql_query(query, con)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "251 hospitals have 0 admissions\n"
     ]
    }
   ],
   "source": [
    "print('{} hospitals have 0 admissions.'.format(df.loc[df['n']==0,'hospitalid'].nunique()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data completion among hospitals with admissions\n",
    "\n",
    "First impute 'missing' so that our groupby reports on the number of hospitals with missing data. Also, we define a convenience function for reporting the absolute count and percent of the total."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "def count_with_percent(x, N):\n",
    "    return '{:3d} ({:5.2f}%)'.format(x.count(), x.count()*100.0/N)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "for c in ['region','numbedscategory','teachingstatus','region']:\n",
    "    df[c].fillna('Missing',inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "region\n",
      "Midwest       70 (33.65%)\n",
      "Missing       25 (12.02%)\n",
      "Northeast     15 ( 7.21%)\n",
      "South         56 (26.92%)\n",
      "West          42 (20.19%)\n",
      "Name: hospitalid, dtype: object\n",
      "\n",
      "numbedscategory\n",
      "100-249     66 (31.73%)\n",
      "250-500     32 (15.38%)\n",
      "<100        50 (24.04%)\n",
      ">500        24 (11.54%)\n",
      "Missing     36 (17.31%)\n",
      "Name: hospitalid, dtype: object\n",
      "\n",
      "teachingstatus\n",
      "False      168 (80.77%)\n",
      "True        17 ( 8.17%)\n",
      "Missing     23 (11.06%)\n",
      "Name: hospitalid, dtype: object\n"
     ]
    }
   ],
   "source": [
    "idx = df['n']!=0\n",
    "N = np.sum(idx)\n",
    "\n",
    "for c in ['region','numbedscategory','teachingstatus']:\n",
    "    grp = df.loc[idx, :].groupby(c)['hospitalid']\n",
    "    \n",
    "    print('')\n",
    "    print(grp.apply(count_with_percent, N))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Above we can see that the frequency of missing data ranges between 11-18% in the hospital table."
   ]
  }
 ],
 "metadata": {
  "front-matter": {
   "date": "2015-09-01",
   "linktitle": "admissiondrug",
   "title": "admissiondrug"
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
